In [1]:
#!/Tsan/bin/python
# -*- coding: utf-8 -*-
In [2]:
# Libraries to use
from __future__ import division
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
import mysql.connector
In [3]:
# Import My own library for factor testing
from SingleFactorTest import factorFilterFunctions as ff
#from config import *
In [4]:
%matplotlib inline
%load_ext line_profiler
In [25]:
riskFreeRate = 0.02
varThreshold =0.05
scaleParameter = 50
In [26]:
with open('conf.json', 'r') as fd:
conf = json.load(fd)
src_db = mysql.connector.connect(**conf['src_db'])
In [27]:
# 表名
index_data_table = 'fund_weekly_index' # index时间序列数据
index_name_table = 'index_id_name_mapping'
type_index_table = 'index_stype_code_mapping' # 表格名称-不同基金种类对应的指数
In [28]:
# 私募指数基金分类表格对应(只需要跑一次)
def get_type_index_table(tableName = type_index_table):
try:
#sql_query='select id,name from student where age > %s'
cursor = src_db .cursor()
sql = "select * from %s" % (tableName)
cursor.execute(sql)
result = cursor.fetchall()
finally:
pass
#pdResult = dict(result)
pdResult = pd.DataFrame(result)
pdResult = pdResult.dropna(axis=0)
pdResult.columns = [i[0] for i in cursor.description]
pdResult.set_index('stype_code',inplace=True)
return pdResult
In [29]:
# 私募指数名称及ID分类表格对应(只需要跑一次)
def get_index_table(tableName = index_name_table):
try:
#sql_query='select id,name from student where age > %s'
cursor = src_db .cursor()
sql = "select * from %s" % (tableName)
cursor.execute(sql)
result = cursor.fetchall()
finally:
pass
#pdResult = dict(result)
pdResult = pd.DataFrame(result)
pdResult = pdResult.dropna(axis=0)
pdResult.columns = [i[0] for i in cursor.description]
pdResult.set_index('index_id',inplace=True)
return pdResult
In [30]:
# 私募指数净值的时间序列
def get_index(index,tableName =index_data_table):
try:
#sql_query='select id,name from student where age > %s'
cursor = src_db.cursor()
sql = "select index_id,statistic_date,index_value from %s where index_id = '%s'" % (tableName,index)
cursor.execute(sql)
result = cursor.fetchall()
finally:
pass
pdResult = pd.DataFrame(result,dtype =float)
pdResult.columns = ['index','date','net_worth']
pdResult = pdResult.drop_duplicates().set_index('date')
pdResult = pdResult.dropna(axis=0)
pdResult = pdResult.fillna(method = 'ffill')
return pdResult
In [31]:
indexIDdf = get_index_table()
indexIDdf
Out[31]:
allfundindex = get_index(indexIDdf.index[0],tableName =index_data_table) allfundindex['year'] = allfundindex.index.map(lambda x : x.year) allfundindex['month'] = allfundindex.index.map(lambda x : (x.year,x.month))
groupgeneratorbymonth = allfundindex.groupby('month') groupgeneratorbyyear = allfundindex.groupby('year') annual_pnl_FI01 = (groupgeneratorbyyear['net_worth'].last() - groupgeneratorbyyear['net_worth'].first())/groupgeneratorbyyear['net_worth'].first() monthly_pnl_FI01 = (groupgeneratorbymonth['net_worth'].last() - groupgeneratorbymonth['net_worth'].first())/groupgeneratorbymonth['net_worth'].first()
annual_pnl_FI01 = pd.DataFrame(annual_pnl_FI01) annual_pnl_FI01.columns = [indexIDdf.index[0]] annual_pnl_FI01 = annual_pnl_FI01.T.round(5)
monthly_pnl_FI01 = pd.DataFrame(monthly_pnl_FI01).round(5) monthly_pnl_FI01.columns = [indexIDdf.index[0]] monthly_pnl_FI01 = monthly_pnl_FI01.T
In [32]:
annual_pnl_FI01
In [ ]:
monthly_pnl_FI01
In [33]:
# 按季度分类
def byseasons(x):
if 1<=x.month<=3:
return str(x.year)+'_'+str(1)
elif 4<= x.month <=6:
return str(x.year)+'_'+str(2)
elif 7<= x.month <=9:
return str(x.year)+'_'+str(3)
else:
return str(x.year)+'_'+str(4)
In [34]:
allfundindex['season'] = allfundindex.index.map(byseasons)
allfundindex['pnl'] = allfundindex['net_worth'].pct_change()
In [35]:
# 计算最大回撤,最大回撤开始结束时间
def cal_max_dd_indicator(networthSeries):
maxdd = pd.DataFrame(index = networthSeries.index, data=None, columns =['max_dd','max_dd_start_date','max_dd_end_date'],dtype = float)
maxdd.iloc[0] = 0
maxdd.is_copy = False
for date in networthSeries.index[1:]:
maxdd.loc[date] = [1 - networthSeries.loc[date] / networthSeries.loc[:date].max(),networthSeries.loc[:date].idxmax(),date]
#maxdd[['max_dd_start_date','max_dd_end_date']].loc[date] = [[networthSeries.loc[:date].idxmax(),date]]
#maxdd['max_dd_start_date'].loc[date] = networthSeries.loc[:date].idxmax()
return maxdd['max_dd'].max(), maxdd.loc[maxdd['max_dd'].idxmax]['max_dd_start_date'],maxdd.loc[maxdd['max_dd'].idxmax]['max_dd_end_date']
In [36]:
seasonList = sorted(list(set(allfundindex['season'].values)))
#maxdd =pd.DataFrame(index = seasonList,columns = [indexIDdf.index[0]],dtype = float)
maxdd_dict = {}
for season in seasonList:
temp = allfundindex[allfundindex['season'] == season]
maxdd_dict[season] = np.round(cal_max_dd_indicator(temp['net_worth'])[0],5)
In [37]:
maxdd_df = pd.DataFrame([maxdd_dict]).T
maxdd_df.columns =[indexIDdf.index[0]]
maxdd_df.index.name = 'season'
maxdd_df
Out[37]:
In [38]:
# 计算最大回撤(每季度)
def cal_maxdd_by_season(df):
seasonList = sorted(list(set(df['season'].values)))
#maxdd =pd.DataFrame(index = seasonList,columns = [indexIDdf.index[0]],dtype = float)
maxdd_dict = {}
for season in seasonList:
temp = df[df['season'] == season]
maxdd_dict[season] = np.round(cal_max_dd_indicator(temp['net_worth'])[0],4)
maxdd_df = pd.DataFrame([maxdd_dict]).T
maxdd_df.columns =[df['index'].iloc[0]]
maxdd_df.index.name = 'season'
return maxdd_df
In [39]:
# 计算最大回撤(每年)
def cal_maxdd_by_year(df):
seasonList = sorted(list(set(df['year'].values)))
#maxdd =pd.DataFrame(index = seasonList,columns = [indexIDdf.index[0]],dtype = float)
maxdd_dict = {}
for season in seasonList:
temp = df[df['year'] == season]
maxdd_dict[season] = np.round(cal_max_dd_indicator(temp['net_worth'])[0],4)
maxdd_df = pd.DataFrame([maxdd_dict]).T
maxdd_df.columns =[df['index'].iloc[0]]
maxdd_df.index.name = 'year'
return maxdd_df
In [40]:
# 计算季度指标
maxddbyseason = pd.DataFrame() # 季度最大回撤
retbyseason = pd.DataFrame() # 季度收益
stdbyseason = pd.DataFrame() # 极度标准差
sharpebyseason = pd.DataFrame() # 季度夏普
# 计算年度指标
maxddbyyear= pd.DataFrame() # 年度最大回撤
retbyyear = pd.DataFrame() # 年度收益
stdbyyear = pd.DataFrame() # 年度标准差
sharpebyyear = pd.DataFrame() # 年度夏普
for index in indexIDdf.index:
# 季度指标
indexdf = get_index(index,tableName =index_data_table)
indexdf['pnl'] = indexdf['net_worth'].pct_change()
indexdf['season'] = indexdf.index.map(byseasons)
indexdf['year'] = indexdf.index.map(lambda x : x.year)
maxdd_season = cal_maxdd_by_season(indexdf)
maxddbyseason = maxddbyseason.merge(maxdd_season,how='outer',left_index=True,right_index =True)
indexbyseason = indexdf.groupby('season')['pnl']
ret_season = (indexbyseason .mean()+1)**scaleParameter - 1 # 年化收益(季度)
std_season = np.sqrt(scaleParameter) * indexbyseason.std() # 年化标准差(季度)
sharpe_season = (ret_season - riskFreeRate) / std_season # 夏普比率(季度)
ret_season = pd.DataFrame(ret_season).round(4) # series 转换为 dataframe
ret_season.columns =[indexdf['index'].iloc[0]] # 添加列名
std_season = pd.DataFrame(std_season).round(4)
std_season.columns =[indexdf['index'].iloc[0]]
sharpe_season = pd.DataFrame(sharpe_season).round(4)
sharpe_season.columns = [indexdf['index'].iloc[0]]
retbyseason = retbyseason.merge(ret_season,how='outer',left_index=True,right_index =True)
stdbyseason = stdbyseason .merge(std_season,how='outer',left_index=True,right_index =True)
sharpebyseason = sharpebyseason.merge(sharpe_season,how='outer',left_index=True,right_index =True)
# 年度指标
maxdd_year = cal_maxdd_by_year(indexdf)
maxddbyyear = maxddbyyear.merge(maxdd_year ,how='outer',left_index=True,right_index =True)
indexbyyear = indexdf.groupby('year')['pnl']
ret_year = (indexbyyear .mean()+1)**scaleParameter - 1 # 年化收益(季度)
std_year = np.sqrt(scaleParameter) * indexbyyear.std() # 年化标准差(季度)
sharpe_year = (ret_year - riskFreeRate) / std_year # 夏普比率(季度)
ret_year = pd.DataFrame(ret_year).round(4) # series 转换为 dataframe
ret_year.columns =[indexdf['index'].iloc[0]] # 添加列名
std_year = pd.DataFrame(std_year).round(4)
std_year.columns =[indexdf['index'].iloc[0]]
sharpe_year = pd.DataFrame(sharpe_year).round(4)
sharpe_year.columns = [indexdf['index'].iloc[0]]
retbyyear = retbyyear.merge(ret_year,how='outer',left_index=True,right_index =True)
stdbyyear = stdbyyear .merge(std_year,how='outer',left_index=True,right_index =True)
sharpebyyear = sharpebyyear.merge(sharpe_year,how='outer',left_index=True,right_index =True)
In [41]:
indexIDdf.index[3:]
Out[41]:
In [42]:
retbyseason.tail()
Out[42]:
In [43]:
retbyyear.tail()
Out[43]:
In [44]:
stdbyseason.tail()
Out[44]:
In [45]:
stdbyyear.tail()
Out[45]:
In [46]:
sharpebyseason.tail()
Out[46]:
In [47]:
sharpebyyear.tail()
Out[47]:
In [48]:
maxddbyseason.tail()
Out[48]:
In [49]:
maxddbyyear.tail()
Out[49]:
In [50]:
for row in maxddbyyear.iterrows():
1
print row[1].index.tolist()
In [51]:
[row[0]]+ row[1].index.tolist()
Out[51]:
In [52]:
row[1].values.tolist()
Out[52]:
In [53]:
field_names = ('field1', 'field2', 'field3')
','.join(field_names)
Out[53]:
In [54]:
## 数据库不支持numpy.float64形式,要转化成python的格式!!
class NumpyMySQLConverter(mysql.connector.conversion.MySQLConverter):
""" A mysql.connector Converter that handles Numpy types """
def _float32_to_mysql(self, value):
return float(value)
def _float64_to_mysql(self, value):
return float(value)
def _int32_to_mysql(self, value):
return int(value)
def _int64_to_mysql(self, value):
return int(value)
res_db = mysql.connector.connect(**conf['res_db'])
res_db.set_converter_class(NumpyMySQLConverter)
cur = res_db.cursor()
In [55]:
# 插入list到mysql数据库,注意表必须是已经建好的,且!
def insert_one_row(datalist):
'''datalist 必须是list形式,并且已包含了index,实际数据长度应与数据库里一行的数据长度一样!'''
res_db = mysql.connector.connect(**conf['res_db'])
res_db.set_converter_class(NumpyMySQLConverter) # 将numpy形式数据转换为python数据
cur = res_db.cursor()
cur.execute('INSERT index_maxdd_by_year SET '
'year = %s, FI01=%s, FI02=%s, FI03=%s, FI04=%s,'
'FI05=%s, FI06=%s, FI07=%s, FI08=%s, FI09=%s,'
'FI10=%s, FI11=%s, FI12=%s, FI13=%s',
datalist)
res_db.commit()
cur.close()
res_db.close()
In [56]:
#map(lambda x: x+'=%s',['s','d'])
In [57]:
#insert_one_row([2021]+list(np.random.rand(13)))
In [58]:
def upsert_indexdf(df):
res_db = mysql.connector.connect(**conf['res_db'])
res_db.set_converter_class(NumpyMySQLConverter) # 将numpy形式数据转换为python数据
cur = res_db.cursor()
for row in df.iterrows():
row[1][pd.isnull(row[1])] = None
cur.execute('INSERT index_maxdd_by_year SET '
'year = %s, FI01=%s, FI02=%s, FI03=%s, FI04=%s,'
'FI05=%s, FI06=%s, FI07=%s, FI08=%s, FI09=%s,'
'FI10=%s, FI11=%s, FI12=%s, FI13=%s',
[row[0]]+ row[1].values.tolist())
res_db.commit()
cur.close()
res_db.close()
In [59]:
cur.execute('INSERT index_maxdd_by_year SET '
'year = %s, FI01=%s, FI02=%s, FI03=%s, FI04=%s,'
'FI05=%s, FI06=%s, FI07=%s, FI08=%s, FI09=%s,'
'FI10=%s, FI11=%s, FI12=%s, FI13=%s',
[row[0]]+ row[1].values.tolist())
#res_db.commit() commit 之后会将之前的语句全部执行!!
In [60]:
np.random.rand(13)
Out[60]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [61]:
def upsert_indexdf(df):
res_db = mysql.connector.connect(**conf['res_db'])
cur = res_db.cursor()
for row in df.iterrows():
row[1][pd.isnull(row[1])] = None
cur.execute(
'INSERT fund_indexdf SET '
'fund_id = %s, fund_name=%s, fund_manager_nominal=%s, foundation_date=%s, end_date=%s,'
'fund_status=%s, type_name=%s, fund_manager=%s, data_end_date=%s, cum_net=%s,'
'return_annualized=%s, max_dd=%s, max_dd_start_date=%s, max_dd_end_date=%s,'
'std_annualized=%s, downsideRisk_annualized=%s, odds=%s, sharpe_ratio=%s, '
'calmar_ratio=%s,sortino_ratio=%s, alpha=%s, beta_2=%s, Beta=%s, Value=%s, '
'EarningYield=%s, Growth=%s, Leverage=%s, Liquidity=%s, Momentum=%s, '
'NonLinearSize=%s, Size=%s, Volatility=%s'
'ON DUPLICATE KEY UPDATE '
'fund_name=%s, fund_manager_nominal=%s, foundation_date=%s, end_date=%s,'
'fund_status=%s, type_name=%s, fund_manager=%s, data_end_date=%s, cum_net=%s,'
'return_annualized=%s, max_dd=%s, max_dd_start_date=%s, max_dd_end_date=%s,'
'std_annualized=%s, downsideRisk_annualized=%s, odds=%s, sharpe_ratio=%s, '
'calmar_ratio=%s,sortino_ratio=%s, alpha=%s, beta_2=%s, Beta=%s, Value=%s, '
'EarningYield=%s, Growth=%s, Leverage=%s, Liquidity=%s, Momentum=%s, '
'NonLinearSize=%s, Size=%s, Volatility=%s',
[row[0]] + row[1].tolist() * 2
)
res_db.commit()
cur.close()
res_db.close()
from sqlalchemy import create_engine import sqlalchemy.engine.url as url
db_engine = create_engine('mysql://{0}:{1}@{2}:{3}/{4}'.format('tai', 'tai2015', '119.254.153.20', 13311, 'PrivateEquityFund_W',encoding='utf-8'))
maxddbyseason.reset_index().to_sql(name='index_maxdd_by_season',con=db_engine, if_exists='replace',index=False) retbyseason.reset_index().to_sql(name='index_return_by_season',con=db_engine, if_exists='replace',index=False) stdbyseason.reset_index().to_sql(name='index_std_by_season',con=db_engine, if_exists='replace',index=False) sharpebyseason.reset_index().to_sql(name='index_sharpe_by_season',con=db_engine, if_exists='replace',index=False)
maxddbyyear.reset_index().to_sql(name='index_maxdd_by_year',con=db_engine, if_exists='replace',index=False) retbyyear.reset_index().to_sql(name='index_return_by_year',con=db_engine, if_exists='replace',index=False) stdbyyear.reset_index().to_sql(name='index_std_by_year',con=db_engine, if_exists='replace',index=False) sharpebyyear.reset_index().to_sql(name='index_sharpe_by_year',con=db_engine, if_exists='replace',index=False)
In [62]:
gene = allfundindex.groupby('season')['pnl']
In [63]:
(0.017056+1)**50 -1
Out[63]:
In [ ]:
In [64]:
((gene .mean()+1)**scaleParameter-1)
Out[64]:
In [65]:
((gene .mean()+1)**scaleParameter-1)/ (np.sqrt(scaleParameter) * gene.std())
Out[65]:
In [ ]:
In [ ]:
In [ ]:
In [66]:
In [67]:
In [68]:
Out[68]:
In [ ]:
In [69]:
In [70]:
Out[70]:
In [ ]: